*********************************************************************
*		Purpose: Clean 2017 PSID cross-section 					    *
*                (adds individuals to 1960s/70s birth cohorts)	    *
*																	*
*		Creates: psid2017_cleaned.dta								*																				*
*********************************************************************
	
clear all
set more off 
set rngstate $rdmseed

cd "$Mydirectory1/1_DataSources/PSID17_CrossSection"

*-------------------------------------------------------------*
*-------------------------------------------------------------*

	do ./rawdata/J320108.do //bring in raw data
	do ./rawdata/J320108_formats.do //run PSID-provided clean-up

	order ER30001 ER30002 ER66009 ER66002 ER34501 ER34502 ER34503
	
*----------------------------------------------------------------------*
*----------------------------------------------------------------------*

*******************************
* Special PSID Restrictions   *
*******************************
	// 1. Keep only reference persons and spouses/partners 
		  /*Notes: (1) In previous survey waves, reference persons were called 
		               "heads" and spouses/partners were referred to as 
		               wives and "wives" (cohabitating females), respectively.
		           (2) Will exclude individuals whose relationship to head 
		               is first-year cohabitator of head, uncooperative legal 
		               spouse, or uncooperative partner. 
				   (3) Most demographic variables are asked only of reference
				       persons and spouses/partners.
		  */
	keep if inlist(ER34503,10,20,22) 
	tab ER34503,m 
	tab ER32000 ER34503,m 
	
	// 2. Keep the correct reference person 
		  /*Note: In some instances, two people in the same family 
		          are labelled as the "reference person", but one person
		          is actually non-response in the current wave. As recommended
		          by the PSID, will use the sequence number to figure out who 
		          the correct reference person is. */

	gen taghead = ER34503==10 
	bysort ER30001 ER66002: egen totalheads = total(tag==1) 
	tab totalheads, m 
	
	drop if totalheads>1 & ER34502>50 //drop "fake" reference persons 
	
	bysort ER30001 ER66002: egen totalheads2 = total(tag==1) 
	tab totalheads2, m //confirmed: 1 reference person per family now 
	drop totalhead*
	
**************************************
* Cross-sectional, ind-level weight  *
**************************************
	clonevar weight_psid2017 = ER34651 

/*-----------------------------------------------------
		CLEANING
------------------------------------------------------*/

*****************************
* Demographics *
*****************************

*****************
* Sex *
*****************
	gen sex = ER32000 
	tab sex,m 

*****************
* Age 
*****************
	
	/*Note: Originally used the "age-reference person" 
	        and "age-spouse" variables, but they seem 
	        to be of lower quality than the "age-individual" 
	        variable, so the latter will be used here. 
	*/

	clonevar age = ER34504 
	replace age =. if age==999
	
	gen agesq = age * age

*------------------------------------------------------------------------ 
* Save a dataset here for Appendix E exercise (drop table)
*------------------------------------------------------------------------ 

	preserve
		gen year = 2017
		save ./output/psid2017_4dropstable, replace
	restore

*----------------------------------*
*----------------------------------*

	keep if inrange(age,30,50)
	tab age,m 

******************
* Marital Status *
******************

	/* Note: Decided that cohabitating partners would
	         be given a "0" for all marital status dummies. */

	gen married =.	
	replace married =1 if ER66024==1 & ER34503==10 //reference person (married)
	replace married =0 if inrange(ER66024,2,5) & ER34503==10 //reference person (not married)
	replace married =1 if inrange(ER34507,1,3) & ER34503==20 //spouse
	replace married =0 if inrange(ER34507,1,3) & ER34503==22 //cohabitating partner
	tab married,m 
	
	gen never_married =.
	replace never_married =1 if ER66024==2 & ER34503==10 //reference person
	replace never_married =0 if ((inlist(ER66024,1,3,4,5) & ER34503==10) | (ER34503==20 & married==1)) & never_married==. //ever-married reference persons + legal spouses
	replace never_married =0 if ER34503==22 & never_married==. //cohabitating partner
	tab never_married,m 
	
	gen divorced =.
	replace divorced =1 if ER66024==4 & ER34503==10 //reference person
	replace divorced =0 if ((inlist(ER66024,1,2,3,5) & ER34503==10) | (ER34503==20 & married==1)) & divorced==. //non-divorced reference persons + legal spouses
	replace divorced =0 if ER34503==22 & divorced==. //cohabitating partner
	tab divorced,m 
	
	gen widowed =.
	replace widowed =1 if ER66024==3 & ER34503==10 //reference person
	replace widowed =0 if ((inlist(ER66024,1,2,4,5) & ER34503==10) | (ER34503==20 & married==1)) & widowed==. //non-widowed reference persons and legal spouses
	replace widowed =0 if ER34503==22 & widowed==. //cohabitating partner
	tab widowed,m 
	
	gen separated =.
	replace separated =1 if ER66024==5 & ER34503==10 //reference person
	replace separated =0 if ((inlist(ER66024,1,2,3,4) & ER34503==10) | (ER34503==20 & married==1)) & separated==. //non-separated reference persons and legal spouses
	replace separated =0 if ER34503==22 & separated==. // cohabitating partner
	tab separated,m 

	
******************
* Race *
******************

	/*Note: Race is asked separately of the reference person 
	        and the spouse, and each person is asked the 
	        question 4 times. Will fill in race for each 
	        person using the first mention, then using the 
	        second mention if race is stil missing, etc.*/
	
	gen race =.
	
	//Reference person 
	ds, has(varlabel "*race of ref*") insensitive
	di "`r(varlist)'"
	
	global racehead  "`r(varlist)'"
		
	foreach r of global racehead {
		di "`r'" 	

		replace race = 1 if `r'==1 & ER34503==10 & race==.
		replace race = 2 if `r'==2 & ER34503==10 & race==.				
	}
	
	tab race,m 
	assert race==. if ER34503!=10 

	//Spouse or partner
	ds, has(varlabel "*race of spouse*") insensitive
	di "`r(varlist)'"
	
	global racespouse "`r(varlist)'"

	foreach r of global racespouse {
		di "`r'" 
		
		replace race = 1 if `r'==1 & ER34503!=10 & race==.
		replace race = 2 if `r'==2 & ER34503!=10 & race==.
				
	}
	
	tab race,m 
		
	gen black = (race==2) if race<.
	tab black, m
	tab race,m 
	label variable black "Dummy =1 if Respondent is Black"
	/*Note: The "0" code of the Black binary variable 
	        consists of white individuals and certain
	        categories of Latinx individuals.*/

******************************
* Foreign Born (respondent)  *
******************************

	foreach var of varlist ER70874 ER70736 {
		replace `var' =. if `var'==99 
	}
	
	gen foreignborn =.
	replace foreignborn = (ER70874==0) if ER70874<. & ER34503==10
	assert foreignborn ==. if ER34503!=10 
	replace foreignborn = (ER70736==0) if ER70736<. & ER67399!=5 & ER34503!=10 & foreignborn==.
	tab foreignborn,m 
	
	**Restrict to native-born**
	keep if (foreignborn==0 | foreignborn==.)
	
	
******************************
* Foreign Born (father)  *
******************************

	gen fatherforeign =.
	
	//fathers of reference persons
	replace fatherforeign =1 if ER70842==0 & ER34503==10
	replace fatherforeign =0 if inrange(ER70842,1,56) & ER34503==10
	
	//fathers of spouses or cohabitating partners
	replace fatherforeign =1 if ER70704==0 & ER67399!=5 & ER34503!=10
	replace fatherforeign =0 if inrange(ER70704,1,56) & ER67399!=5 & ER34503!=10
	
	tab fatherforeign, m
	

**********************************************
* Geographic variables *
**********************************************

*******************
* State of birth * 
*******************
	
	foreach var of varlist ER70874 ER70736  {
		
		tab `var', m
		replace `var' =. if inlist(`var',0,99)	
		tab `var', m
		
	}

	gen bpl =.
	
	replace bpl = ER70874 if ER34503==10 //reference persons
	replace bpl = ER70736 if ER34503!=10 //spouses
	tab bpl, m
	
********************
* Region of birth * 
********************
	
	gen fips = bpl
	gen region4_born=.
		* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
		* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
		/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
				  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
		* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
	tab region4_born, m
	label var region4_born "Region R born"
	
	drop fips

******************************************
* Whether R was born in the south * 
******************************************

	gen bornsouth = (region4_born==3) if region4_born~=.
	tab bornsouth, m

******************************************
* State and Region R grew up in * 
******************************************

*----------*
* state *
*----------*
	foreach var of varlist ER70877 ER70739 {
		
		tab `var', m
		replace `var' =. if inlist(`var',0,99)	
		tab `var', m
		
	}

	gen state_childhood =.
	
	replace state_childhood = ER70877 if ER34503==10 //reference persons
	replace state_childhood = ER70739 if ER34503!=10 //spouses
	tab state_childhood, m


*----------*
* region *
*----------*

	foreach var of varlist ER71534 ER71536 { 
		
		if "`var'" =="ER71534" local who ref
		if "`var'" =="ER71536" local who spouse
		
		clonevar region_grewup_`who' = `var'
		
		replace region_grewup_`who' = 4 if region_grewup_`who'==5 //lump AK/HI in with the "west"
		replace region_grewup_`who' =. if region_grewup_`who'>4 | region_grewup_`who'==0 //>4=foreign country or DK/NA; 0=inappropriate
		tab region_grewup_`who',m 
		tab `var',m
		
	}
	
	gen region4_childhood =.
	replace region4_childhood = region_grewup_ref if ER34503==10
	replace region4_childhood = region_grewup_spouse if ER34503!=10
	tab region4_childhood,m 


********************************
* Region of current residence * 
********************************
	
	gen region4 = ER71530
	replace region4 =4 if region4==5
	replace region4 =. if region4>4
	tab region4,m 
	tab ER71530, m

	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region4 region4_childhood region_l
	tab region4_childhood, m
	tab region4, m

************************************************
* Whether R has moved state/region since birth * 
************************************************

*----------*
* state *
*----------*

	replace ER66004 =. if inlist(ER66004,0,99)
	ren ER66004 current_state17
	
	gen moved_state = (state_childhood!=current_state17 | bpl!=current_state17)
	replace moved_state =. if (state_childhood==. & bpl==.) 
	replace moved_state =0 if (current_state17==state_childhood & bpl==. & current_state17!=.)
	replace moved_state =0 if (current_state17==bpl & state_childhood==. & current_state17!=.)
	tab moved_state, m 

*----------*
* region *
*----------*
	
	gen moved_region = (region4~=region4_born | region4~=region4_childhood)	
	replace moved_region =. if (region4_born==. & region4_childhood==.) 
	replace moved_region =0 if (region4==region4_born & region4_childhood==. & region4~=.)
	replace moved_region =0 if (region4==region4_childhood & region4_born==. & region4~=.)
	tab moved_region, m 


*******************
* Self-employment *
*******************
		
	foreach var of varlist ER66198 ER66473  {
		tab `var', m
		replace `var' =. if inlist(`var',8,9) //dk or na
		tab `var', m
	}

	/*Note: If the respondent reports working 
	        for *both* someone else and him/herself, 
	        the respondent will be coded as not 
	        self-employed.*/

	gen selfemployed =.
	replace selfemployed = (ER66198==3) if ER66198<. & ER34503==10 //reference person
	replace selfemployed = (ER66473==3) if selfemployed==. & ER66473<. & ER34503!=10 //spouse
	tab selfemployed,m 	

***************************
* Education (Respondent) *
***************************


	foreach var of varlist ER71538 ER71539  {
		tab `var',m 
		replace `var' =. if `var'==99 
		tab `var',m 
	}
	
	gen eduR =.
	
	//reference persons
	replace eduR =0 if ER71538==0 & ER34503==10 /*no schooling*/
	replace eduR =1 if inrange(ER71538,1,7) & ER34503==10 /*some grade school*/
	replace eduR =2 if ER71538==8 & ER34503==10 /*graduated grade school*/
	replace eduR =3 if inrange(ER71538,9,11) & ER34503==10 /*some high school*/
	replace eduR =4 if ER71538==12 & ER34503==10 /*completed high school*/
	replace eduR =5 if inrange(ER71538,13,15) & ER34503==10 /*some college*/
	replace eduR =6 if (ER71538>=16 & ER71538<.) & ER34503==10 /*completed college and beyond*/
	assert eduR ==. if ER34503!=10 

	//spouses & cohabitating partners
	replace eduR =0 if eduR==. & ER71539==0 & ER67399!=5 & ER34503!=10 /*no schooling*/ //Note: ER67399=5 means no spouse or partner
	replace eduR =1 if eduR==. & inrange(ER71539,1,7) & ER34503!=10 /*some grade school*/
	replace eduR =2 if eduR==. & ER71539==8 & ER34503!=10 /*graduated grade school*/
	replace eduR =3 if eduR==. & inrange(ER71539,9,11) & ER34503!=10 /*some high school*/
	replace eduR =4 if eduR==. & ER71539==12 & ER34503!=10 /*completed high school*/
	replace eduR =5 if eduR==. & inrange(ER71539,13,15) & ER34503!=10 /*some college*/
	replace eduR =6 if eduR==. & (ER71539>=16 & ER71539<.) & ER34503!=10 /*completed college and beyond*/
	tab eduR,m 
	
	gen yrsschool_bin=. 
	replace yrsschool_bin = 0 if eduR==0
	replace yrsschool_bin = 6 if eduR==1  
	replace yrsschool_bin = 8 if eduR==2
	replace yrsschool_bin = 10 if eduR==3
	replace yrsschool_bin = 12 if eduR==4
	replace yrsschool_bin = 14 if eduR==5
	replace yrsschool_bin = 16 if eduR==6
	tab yrsschool_bin , m
	label var yrsschool_bin "Years of school, binned" 
	
	gen hs_ed = eduR>=4 if edu<. 
	tab hs_ed, m
	label var hs_ed "HS educated" 

	gen coll_ed = eduR>=6 if edu<.
	tab coll_ed, m 
	label var coll_ed "Coll educated" 

***********************
* Education (Parents) *
***********************

	gen edu_dad=.
	gen edu_mom=.

	foreach person in ref spouse {
		
		if "`person'"=="ref"  {
			foreach name in dad mom  {	
				
				if "`name'"=="dad" {
					local var "ER70845"
					local cond "ER34503==10"
					local cond0 "ER70844==5"
				}
				if "`name'"=="mom" {
					local var "ER70855"
					local cond "ER34503==10"
					local cond0 "ER70854==5"
				}
				
				replace edu_`name' =0 if (`var'==0 & `cond0') & `cond' /*no schooling*/
				replace edu_`name' =1 if `var'==1 & `cond' /*some grade school*/
				replace edu_`name' =2 if `var'==2 & `cond' /*completed grade school*/ //Note: unable to split grades 6-7 from grade 8. 
				replace edu_`name' =3 if `var'==3 & `cond' /*some high school*/
				replace edu_`name' =4 if `var'==4 & `cond' /*completed high school*/
				replace edu_`name' =5 if (`var'==5 | `var'==6) & `cond' /*some college OR high school + nonacademic training*/
				replace edu_`name' =6 if (`var'==7 | `var'==8) & `cond' /* college+ */
				
				tab `var' if edu_`name'==. & `cond', m 
				
		}	
	}
	
		if "`person'"=="spouse"  {
			foreach name in dad mom  {
		
				if "`name'"=="dad" {
					local var "ER70707"
					local cond "ER34503!=10"
					local cond0 "ER70706==5"
				}
				if "`name'"=="mom" {
					local var "ER70717"
					local cond "ER34503!=10"
					local cond0 "ER70716==5"
				}
				
				replace edu_`name' =0 if edu_`name'==. & (`var'==0 & `cond0') & `cond' /*no schooling*/
				replace edu_`name' =1 if edu_`name'==. & `var'==1 & `cond' /*some grade school*/
				replace edu_`name' =2 if edu_`name'==. & `var'==2 & `cond' /*completed grade school*/ //Note: unable to split grades 6-7 from grade 8. 
				replace edu_`name' =3 if edu_`name'==. & `var'==3 & `cond'  /*some high school*/
				replace edu_`name' =4 if edu_`name'==. & `var'==4 & `cond'  /*completed high school*/
				replace edu_`name' =5 if edu_`name'==. & (`var'==5 | `var'==6) & `cond' /*some college OR high school + nonacademic training*/
				replace edu_`name' =6 if edu_`name'==. & (`var'==7 | `var'==8) & `cond' /* college+ */
				
				tab `var' if edu_`name'==. & `cond', m 
					
			}
		}
	
	}
	
	tab edu_dad, m 
	tab edu_mom, m 
		
*Binned 
	foreach name in mom dad { 
		gen edu_`name'_bin=0 if edu_`name'==0 
		replace edu_`name'_bin=6 if edu_`name'==1 
		replace edu_`name'_bin=8 if edu_`name'==2 
		replace edu_`name'_bin=10 if edu_`name'==3 
		replace edu_`name'_bin=12 if edu_`name'==4 
		replace edu_`name'_bin=14 if edu_`name'==5 
		replace edu_`name'_bin=16 if edu_`name'==6 
		tab edu_`name'_bin, m
		label var edu_`name'_bin "`name' Years of school from bins"
	
		gen `name'_hs_ed = edu_`name'>=4 if edu_`name'<.
		tab `name'_hs_ed, m

		gen `name'_coll_ed = edu_`name'>=6 if edu_`name'<.
		tab `name'_coll_ed, m 

		label var `name'_hs_ed "`name' HS educated" 
		label var `name'_coll_ed "`name' College educated"
}

*********************************
* # of persons in R's household *
*********************************

* # of children 0-17 living in respondent's household
	gen R_totnumkids_0to17_livinginhh = ER66021
	tab R_totnumkids_0to17_livinginhh, m
	label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh"		 	

* Total # of people living in respondent's household
	gen R_hhsize_plusR = ER66016
	tab R_hhsize_plusR, m
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"			

* Whether respondent is a sibling	
	sort ER30001 ER30002 	
	merge 1:1 ER30001 ER30002 using ../PSID97_CrossSection/output/sib_map_wide.dta
	
	gen sib_psid2017 = (_merge==3)
	label var sib_psid2017 "R is a sibling & present in 2017 wave"

	drop if _merge==2 
	drop _merge
	drop SEX* ID68* PN* TYPE*

******************
* Unions *
******************

/*Note: If the respondent's job is covered by 
        a union contract, the respondent will
        be coded as in a union. */

	gen unionR =.

	//reference persons
	replace unionR =1 if ER66207==1 & ER34503==10
	replace unionR =0 if inlist(ER66207,5,0) & ER34503==10
	assert inlist(ER66207,8,9) if unionR==. & ER34503==10 
	assert unionR==. if ER34503!=10 
	
	//spouses & cohabitating partners 
	replace unionR =1 if unionR==. & ER66482==1 & ER34503!=10 
	replace unionR =0 if unionR==. & inlist(ER66482,5,0) & ER34503!=10 
	assert inlist(ER66482,8,9) if unionR==. & ER34503!=10 
	tab unionR,m 
	
*******************
* Veterans *
*******************
	
	foreach var of varlist ER70890 ER70752  {
		tab `var', m
		replace `var' =. if inlist(`var',0,9) 
		tab `var', m
	}

	gen veteran =.
	replace veteran = (ER70890==1) if ER70890<. & ER34503==10
	replace veteran = (ER70752==1) if ER70752<. & ER34503!=10
	tab veteran, m

*----------------------------------------------------------------------------------*
*----------------------------------------------------------------------------------*

/*------------------------------------------------------------
	Crosswalking 2017 PSID occupations (retrospective)
	to 1950 ANES occupations for fathers and mothers
-------------------------------------------------------------*/

	ren ER70850 ref_dad_occ
	ren ER70712 spouse_dad_occ
	ren ER70860 ref_mom_occ
	ren ER70722 spouse_mom_occ
	
	foreach name in ref_dad spouse_dad ref_mom spouse_mom {
		
		if ("`name'"=="ref_dad" | "`name'"=="ref_mom") local shortname "ref"
		if ("`name'"=="spouse_dad" | "`name'"=="spouse_mom") local shortname "spouse"
		
		gen census2010=`name'_occ 
		label var census2010 "==`name'_occ (renamed to facilitate a merge)" 
		
		//Bring in crosswalk
		preserve
		use ../Crosswalks/Crosswalk_2010Census_toANES.dta, clear
		
		if ("`name'"=="ref_dad" | "`name'"=="spouse_dad") {
			ren fatheroccej_2010 fatheroccej_`shortname'	
			label var fatheroccej_`shortname' "father occ (`shortname')"
		}
		if ("`name'"=="ref_mom" | "`name'"=="spouse_mom") {
			ren fatheroccej_2010 motheroccej_`shortname'
			label var motheroccej_`shortname' "mother occ (`shortname')"
		}
		
		sort census2010
		tempfile crossw
		save `crossw'
		restore
			
		sort census2010
		merge m:1 census2010 using `crossw'
			
		assert census2010==0 if _merge==1 
		tab census2010 if _merge==1, m
		drop if _merge==2
		drop _merge
		drop census2010

	}
	
	//Harmonized, coarsened father occupation and mother occupation
	gen fatheroccej =.
	replace fatheroccej = fatheroccej_ref if ER34503==10
	replace fatheroccej = fatheroccej_spouse if ER34503!=10 & fatheroccej==.
	assert fatheroccej==. if (fatheroccej_ref==. & ER34503==10) | (fatheroccej_spouse==. & ER34503!=10) 
	tab fatheroccej, m
	
	gen motheroccej =.
	replace motheroccej = motheroccej_ref if ER34503==10
	replace motheroccej = motheroccej_spouse if ER34503!=10 & motheroccej==.
	assert motheroccej==. if (motheroccej_ref==. & ER34503==10) | (motheroccej_spouse==. & ER34503!=10)
	tab motheroccej, m

	
/*Note: Other than the "0" and "9999" codes, there is no 
        skip logic to the parental retrospective occupation 
        questions. See the PSID data center for info on 
        interpretation of these codes. */

*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
	
	gen father_notworking =.
	replace father_notworking =1 if fatheroccej==. & ((ref_dad_occ==0 & ER70880==1 & ER34503==10) | (spouse_dad_occ==0 & ER70742==1 & ER34503!=10)) 
	replace father_notworking =0 if fatheroccej!=. & father_notworking==.
	tab father_notworking, m 

	gen mother_notworking =.
	replace mother_notworking =1 if motheroccej==. & ((ref_mom_occ==0 & ER70880==1 & ER34503==10)  | (spouse_mom_occ==0 & ER70742==1 & ER34503!=10)) 
	replace mother_notworking =0 if motheroccej!=. & mother_notworking==.
	tab mother_notworking, m 

	/*Note: There are no occupation codes for "retired" or "disabled". 
	        All reasons for a parent not working are covered by the 
	        "0" or "9999" occupation codes.*/


**************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
**************************************************************************
/*Note: Info on whether R's mom or dad were self-employed when R 
        was growing up is not available in 2017 or in any wave. */

*------------------------------------------
* Head of hh dummies + father_imputed dummy	
*------------------------------------------

	foreach var of varlist ER70742 ER70880 {
		
		if "`var'"=="ER70742" local name SP
		if "`var'"=="ER70880" local name RP
						
		tab `var', m
		replace `var' =. if inlist(`var',0,9) 
		
		label var `var' "WTR LIVED W/BOTH PARENTS-`name' (cleaned) 17"

		tab `var', m	
		
	}

	gen headofhh_father =.

/* Note: Following the coding in other surveys: 
		--headofhh_father =1 if R lived with both parents when growing up 
		  or if R reports having lived with only a father. 
		--headofhh_mother =1 if R lived with a mother but not a father
		--headofhh_othermale =1 if R lived with a male relative and not 
		  with R's parents
		--headofhh_otherfemale =1 if R lived with a female relative and 
		  not with R's parents or a male relative. 
*/
	
	//reference persons
	replace headofhh_father = (ER70880==1) if ER70880<. & ER34503==10 
	replace headofhh_father = 1 if headofhh_father==0 & fatheroccej!=. & motheroccej==. & ER34503==10 
	
	//spouses & cohabitating partners
	replace headofhh_father = (ER70742==1) if ER70742<. & ER34503!=10 
	replace headofhh_father = 1 if headofhh_father==0 & fatheroccej!=. & motheroccej==. & ER34503!=10 
	tab headofhh_father,m 
	
	foreach name in mother othermale otherfemale {
		gen headofhh_`name' =.
		
		replace headofhh_`name' =0 if headofhh_father==1
		
		if "`name'"=="mother" replace headofhh_`name' =1 if fatheroccej==. & motheroccej!=. & headofhh_father==0
		else replace headofhh_`name' =0 if fatheroccej==. & motheroccej!=. & headofhh_father==0
		tab headofhh_`name',m 
	}

	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"

	//Create alternate dummy for hh head being R's father 
	     /* Note: When R reports occupation of their father but 
	              does not specify who they lived with when
	              growing up, will assume that R lived with father.
	     */		
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if headofhh_father==. & fatheroccej!=.
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	tab headofhh_father_imputed,m 
	

* Variable for father being either farm laborer or operator 
	gen fatherfarm=0 
	replace fatherfarm=. if fatheroccej==. 
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
	tab fatherfarm, m 

/*--------------------------------------------------------------------
	Crosswalking 2017 PSID occupations to 1950 ANES
	occupations for adult respondents
---------------------------------------------------------------------*/

	ren ER66195 ref_occ
	ren ER66470 spouse_occ
	
	foreach name in ref spouse {
		
		gen census2010=`name'_occ 
		label var census2010 "==`name'_occ (renamed to facilitate a merge)" 

		//Bring in crosswalk
		preserve
		use ../Crosswalks/Crosswalk_2010Census_toANES.dta, clear

		rename fatheroccej_2010 occRej_`name'
		
		sort census2010
		tempfile crossw
		save `crossw'
		restore
			
		sort census2010
		merge m:1 census2010 using `crossw'

		tab census2010 if _merge==1, m
		assert census2010==0 if _merge==1
		drop if _merge==2
		drop _merge
		drop census2010

	}

/*Note: Other than the "0" and "9999" codes, there is no 
        skip logic to the respondent occupation 
        questions. See the PSID data center for info on 
        interpretation of these codes. */

***************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials  * 
***************************************************************************

	foreach name in ref spouse {
		
		tab selfemployed if occRej_`name'==28, m
		replace occRej_`name'=21 if occRej_`name'==28 & selfemployed==1
		tab selfemployed if occRej_`name'==28, m
		
	}
	
	//Create harmonized, coarsened respondent occupation
	gen occRej =.
	replace occRej = occRej_ref if ER34503==10
	replace occRej = occRej_spouse if ER34503!=10 & occRej==.
	assert  occRej==. if (occRej_ref==. & ER34503==10) | (occRej_spouse==. & ER34503!=10) 
	tab occRej,m 
			
*----------------------------------------------------------------------*
*----------------------------------------------------------------------*

/*------------------------------------------------
	Family Income							
-------------------------------------------------*/

* Logged, continuous measure of family income 
	clonevar totfaminc_negs_as_zero = ER71426
	replace totfaminc_negs_as_zero =0 if totfaminc_negs_as_zero<0
	
	local lab: var label ER71426
	label var totfaminc_negs_as_zero "`lab' (no neg values)"
	
	gen lnfaminc_nobin = ln(totfaminc_negs_as_zero) 

* Create binned measure
/* Note:
		The midpoint of each bin is assigned, with the exception of:
 			(1) the last bin, whose bottom value is multiplied by 1.25 
 	    	(as last bin is always "open-ended"--i.e. "25,000 or more")
 			(2) the bottom bin, whose top value is multiplied by 0.75
*/

	gen fam_inc=.
	replace fam_inc=0.75*15000 if ER71426<15000 //<15k
	replace fam_inc=20000 if ER71426>=15000 & ER71426<25000 //15-25k
	replace fam_inc=30000 if ER71426>=25000 & ER71426<35000 //25-35k	
	replace fam_inc=40000 if ER71426>=35000 & ER71426<45000 //35-45k	
	replace fam_inc=52500 if ER71426>=45000 & ER71426<60000 //45-60k
	replace fam_inc=67500 if ER71426>=60000 & ER71426<75000 //60-75k	
	replace fam_inc=82500 if ER71426>=75000 & ER71426<90000 //75-90k
	replace fam_inc=100000 if ER71426>=90000 & ER71426<110000 //90-110k	
	replace fam_inc=120000 if ER71426>=110000 & ER71426<130000 //110-130k
	replace fam_inc=142500 if ER71426>=130000 & ER71426<155000 //130-155k
	replace fam_inc=177500 if ER71426>=155000 & ER71426<200000 //155-200k
	replace fam_inc=1.25*200000 if ER71426>=200000 //200k+
	tab fam_inc, m
	
	gen bottomcoded_son = fam_inc==0.75*15000 if fam_inc<. 
	tab bottomcoded_son, m 

	gen topcoded_son = fam_inc==1.25*200000 if fam_inc<. 
	tab topcoded_son, m 

	label var bottomcoded_son "Respondent family income, bottom coded" 
	label var topcoded_son "Respondent family income, top coded" 
	
/* Convert family income variable to 1950 dollars
   using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 

   Note: In the 2017 questionnaire, the total family income 
         question asks about 2016 income. Will use CPI in 2016.
*/ 

	gen CPI1950 = 24.1 
	gen CPI2016 = 240.007

	gen fam_inc_real =. 
	replace fam_inc_real = fam_inc * (CPI1950/CPI2016) 
	label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
* Logged + binned
	gen lnfaminc=ln(fam_inc_real) 
	label var lnfaminc "Logged family income, binned and real" 

/*---------------------------------
	Birth cohorts							
----------------------------------*/
	
	/*Note: There are a few respondents (<1%) 
	        who are interviewed in 2018, whereas 
	        the rest are interviewed in 2017. Decided 
	        to assign everyone 2017 as the survey year,
	        otherwise these few respondents will 
	        receive their own survey-year FEs in the 
	        main regressions. */

	gen year = 2017 
	tab year, m
	label var year "Survey year" 
	
	gen dob = ER34506 
	replace dob =. if dob==9999 
	tab dob, m
	label var dob "Year of birth" 

	gen decade=.
	replace decade=1960 if inrange(dob,1960,1969)
	replace decade=1970 if inrange(dob,1970,1979)
	replace decade=1980 if inrange(dob,1980,1989)
	tab decade,m
	label var decade "Decade of birth"

	//generate dummies for each decade
	tab decade, gen(decade_)

/*----------------------------------
	FINAL RESTRICTIONS:
-----------------------------------*/ 
	
	/* 1) Drop any 2017 respondents that are also in the 
	      1997 cleaned sample */
	preserve
	use ../PSID97_CrossSection/output/psid1997_cleaned.dta, clear
	keep ER30001 ER30002
	
	sort ER30001 ER30002
	tempfile nopanel
	save `nopanel'		
	restore
	
	sort ER30001 ER30002
	
	merge 1:1 ER30001 ER30002 using `nopanel'
	
	drop if _merge==3 //Drop 2017 Rs that are also in 1997 sample

	drop if _merge==2 //Drop merged 1997 Rs
	drop _merge
	
	/* 2)  Randomly choose 1 person (reference person OR spouse/partner)
           in cases of 2-person households */
		
	bysort ER30001 ER66002: egen numpersons_subfamily = sum(1)
	tab numpersons_subfamily, m
	
	gen good4analysis = (race!=. & fatheroccej!=. & region4_childhood!=. & fam_inc_real!=.)
	tab good4analysis ,m 
	
	bysort ER30001 ER66002: egen total_good4analysis_subfamily = total(good4analysis==1)
	tab total_good4analysis_subfamily,m 
	
	sort ER30001 ER30002 //MUST ALWAYS SORT BY UNIQUE ID COMBO
	
	gen random = runiform() //Generate random # between 0 and 1 
	sort ER30001 ER66002 random //Note: 2017 family id (ER66002) tells us who lived together/is part of the same subfamily unit.
	by ER30001 ER66002: gen analysis_sample = _n==1 //Grab first person listed in each hh. Order is determined by randomly assigned value.
	
	/*Note: in 2-person families, want to make sure that we grab the person
	        (if there's only one) that has all the necessary info available. */
	replace analysis_sample = 1 if (good4analysis==1 & total_good4analysis_subfamily==1 & numpersons_subfamily==2) & analysis_sample==0
	replace analysis_sample = 0 if (good4analysis==0 & total_good4analysis_subfamily==1 & numpersons_subfamily==2) & analysis_sample==1
	tab analysis_sample,m 
	
	keep if analysis_sample==1
	
	bysort ER30001 ER66002: egen numpersons_subfamily2 = sum(1)
	tab numpersons_subfamily2, m //Confirmed: 1 person per 2017 family
	drop numpersons_subfamily2
 	
/*-----------------------------------------
		Save			
------------------------------------------*/ 

	gen faminc_missing = fam_inc_real==.
	label var faminc_missing "Family income missing"

	gen id_psid2017 = (ER30001*1000) + ER30002 //coded according to PSID instructions

	duplicates report id_psid2017 //no duplicates
	sort id_psid2017
	order id_psid2017 weight_psid2017
	compress
	save ./output/psid2017_cleaned, replace 

